rm(list=ls())
library(tidyverse)
setwd("~/Downloads/dataverse_files(3)")

dat <- read_csv("Data/CongressDocumentsCitations.csv")
dat$doi <- dat$dois_cited

dat <- dat %>% dplyr::select(doi, party_control) %>% unique()
party_cite <- dat %>% group_by(doi) %>% summarise(party_cite = case_when( "D" %in% party_control & "R" %in% party_control ~ "Both", 
                                                         "R" %in% party_control ~ "R", 
                                                         "D" %in% party_control ~ "D")
)


papers <- read_csv("Data/OvertonCitedPapers.csv")



party_cite <- party_cite %>% left_join(papers)

dat <- read_csv("Data/CongressDocumentsCitations.csv")
dat$doi <- dat$dois_cited

num_pcites <- dat %>% group_by(doi) %>% summarise(num_pcites = length(unique(policy_document_id)))

ecdf_pcites <- ecdf(num_pcites$num_pcites)

ecdf_pcites(14)

num_pcites

dat <- dat %>% dplyr::select(doi,congress, party_control, policy_document_id) %>% unique() %>% left_join(num_pcites)



tt_docs <- read_csv("Data/ThinkTankDocumentsCitations.csv")

ideo_cite <- tt_docs %>% group_by(dois_cited) %>% summarise( Left = sum(Left, na.rm = T),
                                                             Right = sum(Right, na.rm = T))


ideo_cite <- ideo_cite %>% mutate(party_cite = case_when(Left > 0 & Right > 0 ~ "Both",
                                                         Left > 0 ~ "L",
                                                         Right > 0 ~ "R"))

ideo_cite <- ideo_cite %>% unique()

ideo_cite <- ideo_cite %>% left_join(papers, by = c("dois_cited" = "doi"))


num_pcites_tt <- tt_docs %>% group_by(dois_cited) %>% summarise(num_pcites = length(unique(policy_document_id)))
num_pcites_tt

ideo_cite <- ideo_cite %>% left_join(num_pcites_tt)
ideo_cite <- ideo_cite %>% filter(!is.na(dois_cited))


tt_dat <- tt_docs %>% mutate(party_control = case_when(Left == 1 ~ "L",
                                                        Right == 1 ~ "R")) %>%
  dplyr::select(dois_cited, party_control, policy_document_id, pub_year) %>% filter(!is.na(dois_cited), !is.na(policy_document_id)) %>% unique() %>% 
  left_join(num_pcites_tt)


permute_party <- function(data){
  data$party_control <- sample(data$party_control) 
  data <- data %>% mutate(dem = case_when(party_control == "D" ~ 1,
                                          TRUE ~ 0),
                          rep = case_when(party_control == "R" ~ 1,
                                          TRUE ~ 0))
  paper_dat <- data %>% group_by(doi, num_pcites) %>%
    summarise(dem = sum(dem), rep = sum(rep))
  
  paper_dat$party_cite <- NA
  paper_dat$party_cite[paper_dat$rep > 0 & paper_dat$dem > 0] <- "Both"
  paper_dat$party_cite[paper_dat$rep == 0 & paper_dat$dem > 0] <- "D"
  paper_dat$party_cite[paper_dat$rep > 0 & paper_dat$dem == 0] <- "R"
  
  return(ungroup(paper_dat))
  
}

observed_party_cong <- function(data){
  data <- data %>% mutate(dem = case_when(party_control == "D" ~ 1,
                                          TRUE ~ 0),
                          rep = case_when(party_control == "R" ~ 1,
                                          TRUE ~ 0))
  paper_dat <- data %>% group_by(doi, num_pcites) %>%
    summarise(dem = sum(dem), rep = sum(rep))
  
  paper_dat$party_cite <- NA
  paper_dat$party_cite[paper_dat$rep > 0 & paper_dat$dem > 0] <- "Both"
  paper_dat$party_cite[paper_dat$rep == 0 & paper_dat$dem > 0] <- "D"
  paper_dat$party_cite[paper_dat$rep > 0 & paper_dat$dem == 0] <- "R"
  return(ungroup(paper_dat))
  
}

permute_party_tt <- function(data){
  data$party_control <- sample(data$party_control) 
  data <- data %>% mutate(Left = case_when(party_control == "L" ~ 1,
                                           TRUE ~ 0),
                          Right = case_when(party_control == "R" ~ 1,
                                            TRUE ~ 0))
  paper_dat <- data %>% group_by(dois_cited, num_pcites) %>%
    summarise(Left = sum(Left), Right = sum(Right))
  
  paper_dat$party_cite <- NA
  paper_dat$party_cite[paper_dat$Right > 0 & paper_dat$Left > 0] <- "Both"
  paper_dat$party_cite[paper_dat$Right == 0 & paper_dat$Left > 0] <- "L"
  paper_dat$party_cite[paper_dat$Right > 0 & paper_dat$Left == 0] <- "R"
  
  return(ungroup(paper_dat))
  
}

observed_party_tt <- function(data){
  data <- data %>% mutate(Left = case_when(party_control == "L" ~ 1,
                                           TRUE ~ 0),
                          Right = case_when(party_control == "R" ~ 1,
                                            TRUE ~ 0))
  paper_dat <- data %>% group_by(dois_cited, num_pcites) %>%
    summarise(Left = sum(Left), Right = sum(Right))
  
  paper_dat$party_cite <- NA
  paper_dat$party_cite[paper_dat$Right > 0 & paper_dat$Left > 0] <- "Both"
  paper_dat$party_cite[paper_dat$Right == 0 & paper_dat$Left > 0] <- "L"
  paper_dat$party_cite[paper_dat$Right > 0 & paper_dat$Left == 0] <- "R"
  
  return(ungroup(paper_dat))
  
}


set.seed(8675309)
cong_perm <- replicate(100, permute_party(dat), simplify = F)
tt_perm <- replicate(100, permute_party_tt(tt_dat), simplify = F)




library(purrr)
cong_perm_dist_geq2<- cong_perm %>% map(~dplyr::filter(., num_pcites > 1)) %>% map(~sum(.$party_cite == "Both")/nrow(.)) %>% unlist()
tt_perm_dist_geq2<- tt_perm %>% map(~dplyr::filter(., num_pcites > 1)) %>% map(~sum(.$party_cite == "Both")/nrow(.)) %>% unlist()

cong_perm_dist_geq2 <- tibble("value" = cong_perm_dist_geq2, "institution" = "Congress", "threshold" = 2)
tt_perm_dist_geq2 <- tibble("value" = tt_perm_dist_geq2, "institution" = "Think Tank", "threshold" = 2)


cong_perm_dist_geq1<- cong_perm %>% map(~sum(.$party_cite == "Both")/nrow(.)) %>% unlist()
tt_perm_dist_geq1<- tt_perm  %>% map(~sum(.$party_cite == "Both")/nrow(.)) %>% unlist()

cong_perm_dist_geq1 <- tibble("value" = cong_perm_dist_geq1, "institution" = "Congress", "threshold" = 1)
tt_perm_dist_geq1 <- tibble("value" = tt_perm_dist_geq1, "institution" = "Think Tank", "threshold" = 1)

permuted_overlap_results <- bind_rows(cong_perm_dist_geq2, tt_perm_dist_geq2, cong_perm_dist_geq1, tt_perm_dist_geq1)



cong_observed_geq2 <- observed_party_cong(dat) %>% filter(party_cite == "Both", num_pcites > 1) %>% nrow()/length(unique((filter(dat, num_pcites > 1)$doi)))
tt_observed_geq2 <- observed_party_tt(tt_dat) %>% filter(party_cite == "Both", num_pcites > 1) %>% nrow()/length(unique((filter(tt_dat, num_pcites > 1)$dois_cited)))

cong_observed_geq1 <- observed_party_cong(dat) %>% filter(party_cite == "Both") %>% nrow()/length(unique(dat$doi))
tt_observed_geq1 <- observed_party_tt(tt_dat) %>% filter(party_cite == "Both") %>% nrow()/length(unique(tt_dat$dois_cited))

observed_results <- tibble("value" = c(cong_observed_geq2, tt_observed_geq2, cong_observed_geq1, tt_observed_geq1),
                           "institution" = c("Congress", "Think Tank", "Congress", "Think Tank"), 
                           "threshold_label" = c("Papers with 2+ policy citations","Papers with 2+ policy citations","All papers with policy citations","All papers with policy citations"))


observed_results


direct_label_dat <- tibble(text = c("Observed share", "Expected share\n(Null model)", "Observed share", "Expected share\n(Null model)","Observed share", "Expected share\n(Null model)","Observed share", "Expected share\n(Null model)"),
                           institution = c("Congress", "Congress","Congress", "Congress", "Think Tank", "Think Tank", "Think Tank", "Think Tank"),
                           value= c(.12, .12, .37, .58, .11, .13, .26, .51), 
                           y = c(.9,.25, .9, .25, .9, .25, .9, .25), 
                           threshold = c(1,1,2,2,1,1,2,2),
                           threshold_label = c("All papers with policy citations","All papers with policy citations","Papers with 2+ policy citations","Papers with 2+ policy citations","All papers with policy citations","All papers with policy citations","Papers with 2+ policy citations","Papers with 2+ policy citations")
)

permuted_overlap_results <- permuted_overlap_results %>% mutate(threshold_label = case_when(threshold == 1 ~ "All papers with policy citations",
                                                                                          threshold == 2 ~ "Papers with 2+ policy citations",
                                                                                          TRUE ~ NA))

library(tibble)
library(ggpmisc)
library(ggdist)
permuted_overlap_chart <- permuted_overlap_results %>%
  ggplot(aes(x=value, color = threshold_label, group = threshold_label)) + 
  stat_slabinterval() +
  geom_vline(data = observed_results, aes(xintercept = value, color = threshold_label), linetype = 4) +
  scale_x_continuous("Percent of bipartisan cited papers", labels = scales::percent, limits = c(0,.65)) +
  theme_minimal() + 
  scale_color_brewer("", palette = "Dark2", labels = c("All papers with policy citations", "Papers with 2+ policy citations")) +
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.y = element_blank())  + 
  geom_text(data = direct_label_dat, aes(y = y, label = text), size = 3, fontface = "bold") + 
  facet_wrap(~institution, scales = "free_x", nrow = 2) 
  

ggsave("Output/PermutedOverlap_Nonmatched.pdf", permuted_overlap_chart, width = 7, height = 6)



##### End of main permuted overlap

#### Permuted overlap by slice 
#Congress
num_pcites_groups <- c(1,2,3,4,5,6, 7,8,9,10, 11,12, 13,14, 15)


library(purrr)


get_permute_at_cutoff <- function(perm_dfs, cutoff){
perm_df_out<- perm_dfs %>% map(~dplyr::filter(.,!is.na(doi), num_pcites >= cutoff)) %>% map(~sum(.$party_cite == "Both")/nrow(.)) %>% unlist()

perm_df_out <- tibble("value" = perm_df_out, "institution" = "Congress", "threshold" = cutoff)
return(perm_df_out)
}

get_observed_at_cutoff <- function(data, cutoff){
output <- observed_party_cong(data) %>% filter(!is.na(doi), party_cite == "Both", num_pcites >= cutoff) %>% nrow()/length(unique((filter(data, num_pcites >= cutoff)$doi)))
output <- tibble("value" = output, "threshold" = cutoff)
return(output)
}


cong_perm_dfs <- list()
for (i in num_pcites_groups) {
  df <- get_permute_at_cutoff(cong_perm, i)
  cong_perm_dfs <- append(cong_perm_dfs, list(df))
}

cong_perm_dfs <- bind_rows(cong_perm_dfs)

cong_observed_dfs <- list()
for (i in num_pcites_groups) {
  df <- get_observed_at_cutoff(dat, i)
  cong_observed_dfs <- append(cong_observed_dfs, list(df))
}

cong_observed_dfs <- bind_rows(cong_observed_dfs)
cong_perm_means <- cong_perm_dfs %>% group_by(institution, threshold) %>% summarise(median_value = median(value))

threshold_bipartshare_plot_cong <- ggplot() + 
  geom_point(data = cong_perm_means, aes(x = threshold, y = median_value, group = threshold, fill = "Expected"),  color = "darkorange") + 
  geom_point(data = cong_observed_dfs, aes(x = threshold, y = value, color = "Observed")) + 
  theme_minimal() +
  xlab("Minimum number of policy citations") + 
  ylab("Share of bipartisan papers") +
  scale_fill_manual(name = "", values = c("Expected" = "darkorange")) + 
  scale_color_manual(name = "", values = c("Observed" = "darkgreen")) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1)) + ggtitle("Congress")



ggsave("Output/Fig2A.pdf", threshold_bipartshare_plot_cong, width = 6, height = 6)


#Think tanks
num_pcites_groups <- c(1,2,3,4,5,6, 7,8,9,10, 11,12, 13,14, 15, 16,17, 18, 19, 20, 21, 22, 23, 24, 25)


library(purrr)

get_permute_at_cutoff <- function(perm_dfs, cutoff){
  perm_df_out<- perm_dfs %>% map(~dplyr::filter(., !is.na(dois_cited), num_pcites >= cutoff)) %>% map(~sum(.$party_cite == "Both")/nrow(.)) %>% unlist()
  
  perm_df_out <- tibble("value" = perm_df_out, "institution" = "Think Tank", "threshold" = cutoff)
  return(perm_df_out)
}

get_observed_at_cutoff <- function(data, cutoff){
  output <- observed_party_tt(data) %>% filter(!is.na(dois_cited), party_cite == "Both", num_pcites >= cutoff) %>% nrow()/length(unique((filter(data, !is.na(dois_cited), num_pcites >= cutoff)$dois_cited)))
  output <- tibble("value" = output, "threshold" = cutoff)
  return(output)
}


tt_perm_dfs <- list()
for (i in num_pcites_groups) {
  df <- get_permute_at_cutoff(tt_perm, i)
  tt_perm_dfs <- append(tt_perm_dfs, list(df))
}

tt_perm_dfs <- bind_rows(tt_perm_dfs)

tt_observed_dfs <- list()
for (i in num_pcites_groups) {
  df <- get_observed_at_cutoff(tt_dat, i)
  tt_observed_dfs <- append(tt_observed_dfs, list(df))
}

tt_observed_dfs <- bind_rows(tt_observed_dfs)


tt_perm_means <- tt_perm_dfs %>% group_by(institution, threshold) %>% summarise(median_value = median(value))

tt_ratio <- tt_perm_means %>% left_join(tt_observed_dfs)

ratio <- tt_ratio$value/tt_ratio$median_value

threshold_bipartshare_plot_tt <- ggplot() + 
  geom_point(data = tt_perm_means, aes(x = threshold, y = median_value, group = threshold, fill = "Expected"),  color = "darkorange") + 
  geom_point(data = tt_observed_dfs, aes(x = threshold, y = value, color = "Observed")) + 
  theme_minimal() +
  xlab("Minimum number of policy citations") + 
  ylab("Share of bipartisan papers") +
  scale_fill_manual(name = "", values = c("Expected" = "darkorange")) + 
  scale_color_manual(name = "", values = c("Observed" = "darkgreen")) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1)) + ggtitle("Think Tanks")



ggsave("Output/Fig2B.pdf", threshold_bipartshare_plot_tt, width = 6, height = 6)


###### Congress Overlap Time



dat_2 <- dat %>% filter(num_pcites == 2) %>% group_by(doi) %>% summarise(party_cite = case_when("D" %in% party_control & "R" %in% party_control ~ "Both",
                                                                                                             "D" %in% party_control ~ "D",
                                                                                                             "R" %in% party_control ~ "R"))
dat_years <- dat %>% dplyr::select(doi, congress) %>% unique()
dat_2 <- dat_2 %>% left_join(dat_years)
year_citations2 <- dat_2 %>% group_by(congress, party_cite) %>% summarise(num_papers = length(unique(doi))) %>% filter(congress > 106)

year_citations2$party_cite <- fct_relevel(year_citations2$party_cite,  "D", "R", "Both") 

two_overlap_bar <- year_citations2 %>% ggplot(aes(x= congress, y=num_papers, fill = party_cite)) + 
  geom_bar(stat = "identity",  position = "fill") + 
  scale_fill_manual("Cited By", values = c(  "#156B90", "#9A3E25", "#666666")) + 
  labs(x = "Policy document publication Congress", y = "Share of cited papers") + 
  theme_minimal() +ggtitle("Papers cited 2 times in policy documents") +theme(legend.position = "bottom")


dat_2plus <- dat %>% filter(num_pcites >= 2) %>% group_by(doi) %>% summarise(party_cite = case_when("D" %in% party_control & "R" %in% party_control ~ "Both",
                                                                                                "D" %in% party_control ~ "D",
                                                                                                "R" %in% party_control ~ "R"))
dat_years <- dat %>% dplyr::select(doi, congress) %>% unique()
dat_2plus <- dat_2plus %>% left_join(dat_years)
year_citations2plus <- dat_2plus %>% group_by(congress, party_cite) %>% summarise(num_papers = length(unique(doi))) %>% filter(congress > 106)

year_citations2plus$party_cite <- fct_relevel(year_citations2plus$party_cite,  "D", "R", "Both") 

twoplus_overlap_bar <- year_citations2plus %>% ggplot(aes(x= congress, y=num_papers, fill = party_cite)) + 
  geom_bar(stat = "identity",  position = "fill") + 
  scale_fill_manual("Cited By", values = c(  "#156B90", "#9A3E25", "#666666")) + 
  labs(x = "Policy document publication Congress", y = "Share of cited papers") + 
  theme_minimal() +ggtitle("Papers cited 2 or more times in policy documents") +theme(legend.position = "bottom")


library(cowplot)

cong_overlaptime <- cowplot::plot_grid(two_overlap_bar, twoplus_overlap_bar, labels = "auto")

ggsave("Output/FigS15.pdf", cong_overlaptime, width = 9, height = 5)




###### Think Tank Overlap Time

tt_dat_2 <- tt_dat %>% filter(num_pcites == 2) %>% group_by(dois_cited) %>% summarise(party_cite = case_when("L" %in% party_control & "R" %in% party_control ~ "Both",
                                                                                                             "L" %in% party_control ~ "L",
                                                                                                             "R" %in% party_control ~ "R"))
tt_dat_years <- tt_dat %>% dplyr::select(dois_cited, pub_year) %>% unique()
tt_dat_2 <- tt_dat_2 %>% left_join(tt_dat_years)
year_citations2 <- tt_dat_2 %>% group_by(pub_year, party_cite) %>% summarise(num_papers = length(unique(dois_cited))) %>% filter(pub_year > 1999)

year_citations2$party_cite <- fct_relevel(year_citations2$party_cite,  "L", "R", "Both")

two_overlap_bar <- year_citations2 %>% ggplot(aes(x= pub_year, y=num_papers, fill = party_cite)) + 
  geom_bar(stat = "identity",  position = "fill") + 
  scale_fill_manual("Cited By", values = c(  "#156B90", "#9A3E25", "#666666")) + 
  labs(x = "Policy document publication year", y = "Share of cited papers") + 
  theme_minimal() +ggtitle("Papers cited 2 times in policy documents") +theme(legend.position = "bottom")


tt_dat_2plus <- tt_dat %>% filter(num_pcites >= 2) %>% group_by(dois_cited) %>% summarise(party_cite = case_when("L" %in% party_control & "R" %in% party_control ~ "Both",
                                                                                                             "L" %in% party_control ~ "L",
                                                                                                             "R" %in% party_control ~ "R"))
tt_dat_2plus <- tt_dat_2plus %>% left_join(tt_dat_years)
year_citations2plus <- tt_dat_2plus %>% group_by(pub_year, party_cite) %>% summarise(num_papers = length(unique(dois_cited))) %>% filter(pub_year > 1999)

year_citations2plus$party_cite <- fct_relevel(year_citations2plus$party_cite,  "L", "R", "Both")

twoplus_overlap_bar <- year_citations2plus %>% ggplot(aes(x= pub_year, y=num_papers, fill = party_cite)) + 
  geom_bar(stat = "identity",  position = "fill") + 
  scale_fill_manual("Cited By", values = c(  "#156B90", "#9A3E25", "#666666")) + 
  labs(x = "Policy document publication year", y = "Share of cited papers") + 
  theme_minimal() +ggtitle("Papers cited 2 or more times in policy documents") +theme(legend.position = "bottom")


library(cowplot)

tt_overlaptime <- cowplot::plot_grid(two_overlap_bar, twoplus_overlap_bar, labels = "auto")

ggsave("Output/tFigS16.pdf", tt_overlaptime, width = 9, height = 5)



party_cite <- party_cite %>% left_join(num_pcites)

party_cite <- party_cite %>% mutate(ispreprint = case_when(type == "preprint" ~ 1,
                                                           TRUE ~ 0))
library(ggpubr)
library(rstatix)
colnames(party_cite)
dat <- read_csv("Data/CongressDocumentsCitations.csv")
dat$doi <- dat$dois_cited

library(congressTools)
dat$pub_year <- year(ymd(dat$date))
dat$year <- congress_to_year(dat$congress) + .5



dat <- dat %>% mutate(cite_lag2 = year - pub_year)

dat_cite_lag <- dat %>% dplyr::select(doi, policy_document_id, cite_lag2) %>% filter(!is.na(doi))
dat_cite_lag <- dat_cite_lag %>% left_join(unique(dplyr::select(party_cite, doi, party_cite)))


dat_cite_lag



ideo_cite <- ideo_cite %>% mutate(ispreprint = case_when(type == "preprint" ~ 1,
                                                           TRUE ~ 0))



library(lubridate)
tt_docs$year <- year(ymd(tt_docs$date))


tt_docs <- tt_docs %>% mutate(cite_lag2 = pub_year - year)

tt_dat_cite_lag <- tt_docs %>% dplyr::select(dois_cited, policy_document_id, cite_lag2) %>% filter(!is.na(dois_cited))
tt_dat_cite_lag <- tt_dat_cite_lag %>% left_join(unique(dplyr::select(ideo_cite, dois_cited, party_cite, num_pcites)))


dat_cite_lag
tt_dat_cite_lag
party_cite
ideo_cite

###### Main dataset created for analysis
# 
# party_tp <- addmargins(table(party_cite$twocites, party_cite$partisan))
# 
# ideo_tp <- addmargins(table(ideo_cite$twocites, ideo_cite$partisan))
# 
# 
# #baserates for two cited papers
# #party_tp[2,3]/party_tp[3,3]
# 
# congress_bishare <- party_tp[2,1]/party_tp[2,3]
# 
# 
# #ideo_tp[2,3]/ideo_tp[3,3]
# 
# tt_bishare <- ideo_tp[2,1]/ideo_tp[2,3]
# 
# bishare_actual <- bind_cols("bishare" = c(congress_bishare, tt_bishare), "name" = c("Congress", "ThinkTank"), type = "Observed")
# perm_nulls$type <- "Null Distribution\n(permuted)"
# library(ggdist)
#     
# bipartisan_share_nulls <- perm_nulls  %>% ggplot(aes(x=value, color = type, fill = type), y = 0) + 
#   stat_slabinterval(slab_alpha = .5) + theme_classic() + 
# geom_vline(dat=bishare_actual, aes(xintercept = bishare, color = type, fill = type), linetype = 3) + facet_wrap(~name)  + 
#   theme(legend.position = "top", 
#         axis.text.y=element_blank(),
#         axis.ticks.y=element_blank()) +
#   geom_point(dat= bishare_actual, aes(x=bishare), y = 0, size = 3) +xlab("Share of Papers with Bipartisan Citations\n(Among papers receiving two citations)") +ylab("")
# 
# ggsave("BipartisanShareNulls.pdf", bipartisan_share_nulls, width = 7, height = 3)
# 

stat.test_citelag_perc_cong <- dat_cite_lag %>%
  filter( party_cite %in% c("D", "R")) %>%
  rstatix::t_test(cite_lag2 ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_citelag_perc_cong <- stat.test_citelag_perc_cong %>% add_xy_position(x = "party_cite")

citelag_perc_cong <-dat_cite_lag %>% filter( party_cite %in% c("D", "R")) %>% ggplot(aes(x=party_cite, y=cite_lag2, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c( "Dem", "Rep"), values=c( "#156B90", "#9A3E25")) +
  scale_y_continuous("Cite lag\n(Years)") + xlab("")

citelag_perc_cong <- citelag_perc_cong + stat_pvalue_manual(stat.test_citelag_perc_cong, hide.ns = TRUE,
                                                            label = "{p.adj.signif}",  y.position = 8.5, step.increase = .0003, tip.length = .0002)




stat.test_citelag_perc_tt <- tt_dat_cite_lag %>%
  filter( party_cite %in% c("L", "R")) %>%
  rstatix::t_test(cite_lag2 ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_citelag_perc_tt <- stat.test_citelag_perc_tt %>% add_xy_position(x = "party_cite")

citelag_perc_tt <-tt_dat_cite_lag %>% filter(, party_cite %in% c("L", "R")) %>% ggplot(aes(x=party_cite, y=cite_lag2, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c("Left", "Right"), values=c( "#156B90", "#9A3E25")) +
  scale_y_continuous("Cite lag\n(Years)") + xlab("")

citelag_perc_tt <- citelag_perc_tt + stat_pvalue_manual(stat.test_citelag_perc_tt, hide.ns = TRUE,
                                                        label = "{p.adj.signif}", y.position = 11.2, step.increase = .0009, tip.length = .0002)



stat.test_hit_perc_cong <- party_cite %>%
  filter( party_cite %in% c("D", "R")) %>%
  rstatix::t_test(hit_5 ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_hit_perc_cong <- stat.test_hit_perc_cong %>% add_xy_position(x = "party_cite")

hit_perc_cong <-party_cite %>% filter( party_cite %in% c("D", "R")) %>% ggplot(aes(x=party_cite, y=hit_5, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c( "Dem", "Rep"), values=c( "#156B90", "#9A3E25")) +
  scale_y_continuous("Percent Hit 5", labels = scales::percent) + xlab("")

hit_perc_cong <- hit_perc_cong + stat_pvalue_manual(stat.test_hit_perc_cong, hide.ns = TRUE,
                                                    label = "{p.adj.signif}", y.position = .49, step.increase = .005, tip.length = .0009)


stat.test_hit_perc_tt <- ideo_cite %>%
  filter( party_cite %in% c("L", "R")) %>%
  rstatix::t_test(hit_5 ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_hit_perc_tt <- stat.test_hit_perc_tt %>% add_xy_position(x = "party_cite")

hit_perc_tt <-ideo_cite %>% filter( party_cite %in% c("L", "R")) %>% ggplot(aes(x=party_cite, y=hit_5, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c( "Left", "Right"), values=c( "#156B90", "#9A3E25")) +
  scale_y_continuous("Percent Hit 5", labels = scales::percent) + xlab("")

hit_perc_tt <- hit_perc_tt + stat_pvalue_manual(stat.test_hit_perc_tt, hide.ns = TRUE,
                                                label = "{p.adj.signif}", y.position = .46, step.increase = .02, tip.length = .005)


stat.test_cited_perc_cong <- party_cite %>%
  filter(party_cite %in% c("D", "R")) %>%
  rstatix::t_test(field_citation_ratio ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_hit_perc_tt <- stat.test_hit_perc_tt %>% add_xy_position(x = "party_cite")

cited_cong <-party_cite %>% filter( party_cite %in% c("D", "R")) %>% ggplot(aes(x=party_cite, y=field_citation_ratio, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c( "Dem", "Rep"), values=c( "#156B90", "#9A3E25")) +
  ylab("Field Citation Ratio") + xlab("")

cited_cong <-cited_cong + stat_pvalue_manual(stat.test_cited_perc_cong, hide.ns = TRUE,
                                             label = "{p.adj.signif}", y.position = 66, tip.length = .0001, step.increase = .0002 )


#Same but logged
party_cite$log_field_citation_ratio <- log(party_cite$field_citation_ratio)
stat.test_cited_perc_log_cong <- party_cite %>% filter(is.finite(log_field_citation_ratio)) %>%
  filter(party_cite %in% c("D", "R")) %>%
  rstatix::t_test(log_field_citation_ratio ~ party_cite) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_cited_perc_log_cong <- stat.test_cited_perc_log_cong %>% add_xy_position(x = "party_cite")

log_cited_cong <-party_cite %>% filter( party_cite %in% c("D", "R")) %>% ggplot(aes(x=party_cite, y=log_field_citation_ratio, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c( "Dem", "Rep"), values=c( "#156B90", "#9A3E25")) +
  ylab("Log Field Citation Ratio") + xlab("")

log_cited_cong <-log_cited_cong + stat_pvalue_manual(stat.test_cited_perc_log_cong, hide.ns = TRUE,
                                             label = "{p.adj.signif}", y.position = 2.5, tip.length = .0001, step.increase = .0002 )


stat.test_cited_perc_tt <- ideo_cite %>%
  filter( party_cite %in% c("L", "R")) %>%
  rstatix::t_test(field_citation_ratio ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_cited_perc_tt <- stat.test_cited_perc_tt %>% add_xy_position(x = "party_cite")

cited_tt <-ideo_cite %>% filter(num_pcites > 1, party_cite %in% c("L", "R")) %>% ggplot(aes(x=party_cite, y=field_citation_ratio, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c("Left", "Right"), values=c( "#156B90", "#9A3E25")) +
  ylab("Field Citation Ratio") + xlab("")

cited_tt <- cited_tt + stat_pvalue_manual(stat.test_cited_perc_tt, hide.ns = TRUE,
                                          label = "{p.adj.signif}", y.position = 42, tip.length = .0001, step.increase = .0003 )



ideo_cite$log_field_citation_ratio <- log(ideo_cite$field_citation_ratio)
stat.test_cited_perc_log_tt <- ideo_cite %>% filter(is.finite(log_field_citation_ratio)) %>% 
  filter( party_cite %in% c("L", "R")) %>%
  rstatix::t_test(log_field_citation_ratio ~ party_cite) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_cited_perc_log_tt <- stat.test_cited_perc_log_tt %>% add_xy_position(x = "party_cite")

log_cited_tt <-ideo_cite %>% filter( party_cite %in% c("L", "R")) %>% ggplot(aes(x=party_cite, y=log_field_citation_ratio, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c("Left", "Right"), values=c( "#156B90", "#9A3E25")) +
  ylab("Log Field Citation Ratio") + xlab("")

log_cited_tt <- log_cited_tt + stat_pvalue_manual(stat.test_cited_perc_log_tt, hide.ns = TRUE,
                                          label = "{p.adj.signif}", y.position = 2.45, tip.length = .00004, step.increase = .0003 )


library(cowplot)

log_FCR <- cowplot::plot_grid(log_cited_cong, log_cited_tt, labels = c("A", "B"))

ggsave("Output/FigS28.pdf", log_FCR, width = 6, height = 3)


party_cite$ispreprint <- 0
party_cite$ispreprint[party_cite$type == "preprint"] <- 1

stat.test_preprintperc_cong <- party_cite %>%
  filter( party_cite %in% c("D", "R")) %>%
  rstatix::t_test(ispreprint ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_preprintperc_cong <- stat.test_preprintperc_cong %>% add_xy_position(x = "party_cite")

preprintperc_cong <-party_cite %>% filter(party_cite %in% c("D", "R")) %>% ggplot(aes(x=party_cite, y=ispreprint, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c( "Dem", "Rep"), values=c( "#156B90", "#9A3E25")) +
  scale_y_continuous("Percent preprint", labels = scales::percent) + xlab("")

preprintperc_cong <- preprintperc_cong + stat_pvalue_manual(stat.test_preprintperc_cong, hide.ns = TRUE,
                                                            label = "{p.adj.signif}", y.position = .022, step.increase = .01, tip.length = .0005)


ideo_cite$ispreprint <- 0
ideo_cite$ispreprint[ideo_cite$type == "preprint"] <- 1

stat.test_preprintperc_tt <- ideo_cite %>%
  filter( party_cite %in% c("L", "R") ) %>%
  rstatix::t_test(ispreprint ~ party_cite, detailed = T) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance()
stat.test_preprintperc_tt <- stat.test_preprintperc_tt %>% add_xy_position(x = "party_cite")

preprintperc_tt <-ideo_cite %>% filter(party_cite %in% c("L", "R")) %>% ggplot(aes(x=party_cite, y=ispreprint, color = party_cite)) + stat_summary(fun.y = mean, geom = "point", size=3)  + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + theme_minimal() +
  theme(legend.position = "none") + scale_color_manual("", labels  = c( "Left", "Right"), values=c("#156B90", "#9A3E25")) +
  scale_y_continuous("Percent preprint", labels = scales::percent) + xlab("")

preprintperc_tt <- preprintperc_tt + stat_pvalue_manual(stat.test_preprintperc_tt, hide.ns = TRUE,
                                                        label = "{p.adj.signif}", y.position = .04, step.increase = .003, tip.length = .001)



pap_diff <- cowplot::plot_grid(hit_perc_cong, cited_cong, preprintperc_cong, citelag_perc_cong, hit_perc_tt, cited_tt, preprintperc_tt, citelag_perc_tt, nrow = 1, labels = "auto")

ggsave("Output/FigS25.pdf", pap_diff, width = 11.73, height = 3)




t_test_out <- bind_rows(stat.test_preprintperc_tt,
stat.test_preprintperc_cong,
stat.test_cited_perc_tt,
stat.test_cited_perc_cong,
stat.test_hit_perc_tt,
stat.test_hit_perc_cong,
stat.test_citelag_perc_tt,
stat.test_citelag_perc_cong)

t_test_out <- t_test_out %>% mutate(institution = case_when(group1 == "L" ~ "Think Tank",
                                                            TRUE ~ "Congress"),
                                    `.y.` = case_when( `.y.` == "ispreprint" ~ "Pre-print",
                                                     `.y.` == "field_citation_ratio" ~ "FCR",
                                                     `.y.` == "hit_5" ~ "Hit (5) Paper",
                                                     `.y.` == "cite_lag2" ~ "Cite Lag")) %>%
  dplyr::select(institution,`.y.`, estimate1, estimate2, statistic, df, p)

names(t_test_out) <- c("Institution", "Variable", "Mean_L", "Mean_R", "t-Statistic", "Deg. of Freedom", "p-value")

t_test_out <- t_test_out %>% arrange( Institution, Variable)
t_test_out[,3:7] <- round(t_test_out[,3:7], digits = 5)

write_csv(t_test_out, "Output/Table1.csv")
